Garmin Data Setup¶
Setting Up Garmin Data with Garmin DB Module¶
Installing the Garmin DB package, which can be found here: https://github.com/tcgoetz/GarminDB
!pip install garmindb
Requirement already satisfied: garmindb in /usr/local/lib/python3.11/dist-packages (3.6.3) Requirement already satisfied: SQLAlchemy==2.0.36 in /usr/local/lib/python3.11/dist-packages (from garmindb) (2.0.36) Requirement already satisfied: python-dateutil==2.9.0.post0 in /usr/local/lib/python3.11/dist-packages (from garmindb) (2.9.0.post0) Requirement already satisfied: cached-property==1.5.2 in /usr/local/lib/python3.11/dist-packages (from garmindb) (1.5.2) Requirement already satisfied: tqdm==4.66.5 in /usr/local/lib/python3.11/dist-packages (from garmindb) (4.66.5) Requirement already satisfied: garth==0.5.2 in /usr/local/lib/python3.11/dist-packages (from garmindb) (0.5.2) Requirement already satisfied: fitfile>=1.1.9 in /usr/local/lib/python3.11/dist-packages (from garmindb) (1.1.9) Requirement already satisfied: tcxfile>=1.0.4 in /usr/local/lib/python3.11/dist-packages (from garmindb) (1.0.4) Requirement already satisfied: idbutils>=1.1.0 in /usr/local/lib/python3.11/dist-packages (from garmindb) (1.1.0) Requirement already satisfied: tornado>=6.4.2 in /usr/local/lib/python3.11/dist-packages (from garmindb) (6.4.2) Requirement already satisfied: pydantic<3.0.0,>=1.10.12 in /usr/local/lib/python3.11/dist-packages (from garth==0.5.2->garmindb) (2.11.1) Requirement already satisfied: requests-oauthlib<3.0.0,>=1.3.1 in /usr/local/lib/python3.11/dist-packages (from garth==0.5.2->garmindb) (2.0.0) Requirement already satisfied: requests<3.0.0,>=2.0.0 in /usr/local/lib/python3.11/dist-packages (from garth==0.5.2->garmindb) (2.32.3) Requirement already satisfied: six>=1.5 in /usr/local/lib/python3.11/dist-packages (from python-dateutil==2.9.0.post0->garmindb) (1.17.0) Requirement already satisfied: typing-extensions>=4.6.0 in /usr/local/lib/python3.11/dist-packages (from SQLAlchemy==2.0.36->garmindb) (4.13.0) Requirement already satisfied: greenlet!=0.4.17 in /usr/local/lib/python3.11/dist-packages (from SQLAlchemy==2.0.36->garmindb) (3.1.1) Requirement already satisfied: annotated-types>=0.6.0 in /usr/local/lib/python3.11/dist-packages (from pydantic<3.0.0,>=1.10.12->garth==0.5.2->garmindb) (0.7.0) Requirement already satisfied: pydantic-core==2.33.0 in /usr/local/lib/python3.11/dist-packages (from pydantic<3.0.0,>=1.10.12->garth==0.5.2->garmindb) (2.33.0) Requirement already satisfied: typing-inspection>=0.4.0 in /usr/local/lib/python3.11/dist-packages (from pydantic<3.0.0,>=1.10.12->garth==0.5.2->garmindb) (0.4.0) Requirement already satisfied: charset-normalizer<4,>=2 in /usr/local/lib/python3.11/dist-packages (from requests<3.0.0,>=2.0.0->garth==0.5.2->garmindb) (3.4.1) Requirement already satisfied: idna<4,>=2.5 in /usr/local/lib/python3.11/dist-packages (from requests<3.0.0,>=2.0.0->garth==0.5.2->garmindb) (3.10) Requirement already satisfied: urllib3<3,>=1.21.1 in /usr/local/lib/python3.11/dist-packages (from requests<3.0.0,>=2.0.0->garth==0.5.2->garmindb) (2.3.0) Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.11/dist-packages (from requests<3.0.0,>=2.0.0->garth==0.5.2->garmindb) (2025.1.31) Requirement already satisfied: oauthlib>=3.0.0 in /usr/local/lib/python3.11/dist-packages (from requests-oauthlib<3.0.0,>=1.3.1->garth==0.5.2->garmindb) (3.2.2)
#load in Garmin Password
from google.colab import userdata
import json
#mount google drive
from google.colab import drive
import json
import os
drive.mount('/content/drive')
garmin_pw = userdata.get('garmin_Pw')
Setting up connection to Garmin DB through the configuration file.
#Setting up config file for Garmin
filepath = '/usr/local/lib/python3.11/dist-packages/garmindb/GarminConnectConfig.json.example'
try:
with open(filepath, 'r') as f:
data = json.load(f)
print(data)
except FileNotFoundError:
print(f"Error: File not found at {filepath}")
except json.JSONDecodeError:
print(f"Error: Invalid JSON format in {filepath}")
{'db': {'type': 'sqlite'}, 'garmin': {'domain': 'garmin.com'}, 'credentials': {'user': 'joe@shmoe.com', 'secure_password': False, 'password': 'yourpassword'}, 'data': {'weight_start_date': '12/31/2019', 'sleep_start_date': '12/31/2019', 'rhr_start_date': '12/31/2019', 'monitoring_start_date': '12/31/2019', 'download_latest_activities': 25, 'download_all_activities': 1000}, 'directories': {'relative_to_home': True, 'base_dir': 'HealthData', 'mount_dir': '/Volumes/GARMIN'}, 'enabled_stats': {'monitoring': True, 'steps': True, 'itime': True, 'sleep': True, 'rhr': True, 'weight': True, 'activities': True}, 'course_views': {'steps': []}, 'modes': {}, 'activities': {'display': []}, 'settings': {'metric': False, 'default_display_activities': ['walking', 'running', 'cycling']}, 'checkup': {'look_back_days': 90}}
#changing parameters for the config file to Dan's profile
#credentials
data['credentials']['password'] = garmin_pw
data['credentials']['user'] = 'swimmingdog72@gmail.com'
listofdates = ['weight_start_date', 'sleep_start_date', 'rhr_start_date', 'monitoring_start_date']
#only pull last 30 days of data
data['data']['download_all_activities'] = 30
#set dates for all types to beginning of march
for param in listofdates:
data['data'][param] = '03/01/2025'
#make all stats disabled except for activities at first
enabledstats = ['monitoring', 'steps', 'itime','sleep', 'rhr', 'weight']
for stat in enabledstats:
data['enabled_stats'][stat] = False
#then, enable monioring and steps
data['enabled_stats']['monitoring'] =True
data['enabled_stats']['steps'] = True
# Create the directory if it doesn't exist
directory = '/root/.GarminDb'
if not os.path.exists(directory):
os.makedirs(directory)
# Write the JSON data to the file
filepath = '/root/.GarminDb/GarminConnectConfig.json'
try:
with open(filepath, 'w') as f:
json.dump(data, f, indent=4) # Use json.dump to write JSON data
print(f"Successfully wrote JSON data to {filepath}")
except Exception as e:
print(f"An error occurred: {e}")
Successfully wrote JSON data to /root/.GarminDb/GarminConnectConfig.json
Calls Garmin DB package to load data
!garmindb_cli.py --all --download --import --analyze --latest
___Downloading Latest Data___
Getting activities: '/root/HealthData/FitFiles/Activities' (25) temp /tmp/tmp6umf4kqm
100% 25/25 [00:30<00:00, 1.20s/activities]
Recent monitoring data not found, using: 2025-03-01 : 34
100% 34/34 [00:36<00:00, 1.08s/days]
100% 34/34 [00:36<00:00, 1.07s/days]
100% 34/34 [00:41<00:00, 1.22s/days]
___Importing Latest Data___
Processing user settings data
Processing profile data
0% 0/1 [00:00<?, ?files/s]Processing profile data: {'measurement_system': 'DisplayMeasure.statute', 'gender': 'Gender.male', 'weight': 154.998224638, 'height': 5.916666555512054, 'vo2max_running': 64.0, 'vo2max_cycling': None, 'handedness': 'right'}
100% 1/1 [00:00<00:00, 13.59files/s]
Processing user personal information data
Processing profile data
0% 0/1 [00:00<?, ?files/s]Processing profile data: {'locale': 'en', 'time_zone': 'America/Denver', 'country_code': 'US'}
100% 1/1 [00:00<00:00, 28.31files/s]
Processing user settings data
Processing profile data
0% 0/1 [00:00<?, ?files/s]Processing profile data: {'id': 381295194, 'userName': 'swimmingdog72@gmail.com', 'name': 'Daniel'}
100% 1/1 [00:00<00:00, 28.55files/s]
Processing [<FileType.settings: 2>] FIT data from /root/HealthData/FitFiles
Processing daily summary data
100% 34/34 [00:00<00:00, 92.05files/s]
100% 34/34 [00:00<00:00, 93.51files/s]
Processing [<FileType.monitoring_b: 32>] FIT data from /root/HealthData/FitFiles/Monitoring
100% 106/106 [00:16<00:00, 6.60files/s]
Processing activities tcx data
Processing latest activities summary data from /root/HealthData/FitFiles/Activities
100% 25/25 [00:00<00:00, 88.70files/s]
Processing activities detail data
100% 25/25 [00:00<00:00, 77.15files/s]
Processing [<FileType.activity: 4>] FIT data from /root/HealthData/FitFiles/Activities
100% 25/25 [00:24<00:00, 1.03files/s]
___Analyzing Data___
Summary Tables Generation:
Generating table entries for 2025
100% 34/34 [00:03<00:00, 10.53days/s]
100% 18/18 [00:00<00:00, 299.29days/s]
100% 52/52 [00:00<00:00, 108.60weeks/s]
100% 2/2 [00:00<00:00, 18.80months/s]
100% 2/2 [00:00<00:00, 278.95months/s]
Loading in Garmin Packages and pandas.
#Importing more packages from GarminDB
from garmindb import GarminConnectConfigManager
from garmindb.garmindb import GarminDb, Attributes, ActivitiesDb, Activities, StepsActivities, ActivityLaps, ActivityRecords
import fitfile
from garmindb import GarminConnectConfigManager
from garmindb.garmindb import GarminDb, Attributes, ActivitiesDb, Activities, StepsActivities, ActivityLaps, ActivityRecords
from garmindb.garmindb import Attributes, Weight, Stress, RestingHeartRate, IntensityHR, Sleep
from garmindb.garmindb import MonitoringDb, Monitoring, MonitoringHeartRate, MonitoringIntensity, MonitoringClimb
from idbutils.list_and_dict import list_not_none
import pandas as pd
#set up configurations for garmin DB as specified in GarminDB's documentation
gc_config = GarminConnectConfigManager()
db_params_dict = gc_config.get_db_params()
garmin_db = GarminDb(db_params_dict)
garmin_act_db = ActivitiesDb(db_params_dict)
measurement_system = Attributes.measurements_type(garmin_db)
unit_strings = fitfile.units.unit_strings[measurement_system]
distance_units = unit_strings[fitfile.units.UnitTypes.distance_long]
altitude_units = unit_strings[fitfile.units.UnitTypes.altitude]
temp_units = unit_strings[fitfile.units.UnitTypes.tempurature]
Set Up SQLite Database¶
Next we set up SQLite Databases to hold the data that we extract from the Gamrin DB objects. This is used to make the data more accessible and create file formats such as parquet and csv files to simulate streaming.
#create SQLiteDB
import sqlite3
conn = sqlite3.connect("garmin_data.sqlite")
cursor = conn.cursor()
#create tables to hold Garmin Data
#drop all tables for reproducability of results
cursor.execute("""DROP TABLE IF EXISTS Activities""")
cursor.execute("""DROP TABLE IF EXISTS ActivityLaps""")
cursor.execute("""DROP TABLE IF EXISTS StepsActivity""")
cursor.execute("""DROP TABLE IF EXISTS activity_record""")
#Create table for activity table
cursor.execute("""
CREATE TABLE IF NOT EXISTS Activities (
id INTEGER PRIMARY KEY,
name TEXT,
description TEXT,
sport TEXT,
type TEXT,
distance REAL,
elapsed_time INTEGER,
moving_time INTEGER,
avg_hr INTEGER,
max_hr INTEGER,
avg_rr INTEGER,
max_rr INTEGER,
ascent REAL,
descent REAL,
calories INTEGER,
training_load REAL,
avg_temperature REAL,
start_lat REAL,
start_long REAL,
stop_lat REAL,
stop_long REAL,
avg_cadence TEXT
)
""")
#create table for activityLaps
cursor.execute('''
CREATE TABLE IF NOT EXISTS ActivityLaps (
activity_id INTEGER,
lap INTEGER,
start_time TEXT,
stop_time TEXT,
elapsed_time TEXT,
moving_time TEXT,
distance REAL,
cycles REAL,
avg_hr INTEGER,
max_hr INTEGER,
avg_rr REAL,
max_rr REAL,
calories INTEGER,
avg_cadence INTEGER,
max_cadence INTEGER,
avg_speed REAL,
max_speed REAL,
ascent REAL,
descent REAL,
max_temperature REAL,
min_temperature REAL,
avg_temperature REAL,
start_lat REAL,
start_long REAL,
stop_lat REAL,
stop_long REAL,
hr_zones_method INTEGER,
hrz_1_hr INTEGER,
hrz_2_hr INTEGER,
hrz_3_hr INTEGER,
hrz_4_hr INTEGER,
hrz_5_hr INTEGER,
hrz_1_time TEXT,
hrz_2_time TEXT,
hrz_3_time TEXT,
hrz_4_time TEXT,
hrz_5_time TEXT,
PRIMARY KEY (activity_id, lap)
);
''')
#create table for stepsActivity (did not end up using)
cursor.execute ("""
CREATE TABLE IF NOT EXISTS StepsActivity (
activity_id INT PRIMARY KEY,
steps INTEGER,
avg_pace TEXT,
avg_moving_pace TEXT,
max_pace TEXT,
avg_steps_per_min INTEGER,
max_steps_per_min INTEGER,
avg_step_length REAL,
avg_vertical_ratio REAL,
avg_vertical_oscillation REAL,
avg_gct_balance REAL,
avg_ground_contact_time TEXT,
avg_stance_time_percent REAL,
vo2_max REAL
);
""")
#create table for activity record (will be used for streaming data)
cursor.execute("""
CREATE TABLE IF NOT EXISTS activity_record (
activity_id TEXT,
record INTEGER,
timestamp TEXT,
position_lat REAL,
position_long REAL,
distance REAL,
cadence INTEGER,
altitude REAL,
hr INTEGER,
rr REAL,
speed REAL,
temperature REAL,
PRIMARY KEY (activity_id, record)
);
""")
conn.commit()
Next, we extract all the activity IDs available in the GarminDB configuration. We will use these to loop through each of the activities' information and add it to our SQLite Database
#get activity IDs to loop through and add each later
activity_ids = []
for activity in Activities.get_all(garmin_act_db):
activity_ids.append(activity.activity_id)
Streaming Simulation¶
For the streaming simulation, we are aiming to recreate the process of receiving data via a PySpark Stream from a device like a Garmin watch while the user is out running. In order to do this, we first load the details for a particular activity, input the data into a SQLite database, and then extract from the SQLite DB to create a CSV file that we will use for a streaming simulation.
Downloading Activity Record for a long run (1000+ data points)
activity_record = ActivityRecords.get_activity(garmin_act_db, '18675131831')
Loading the data into a SQLite Dtaabase
#loading all the datapoints into SQLite to turn into a csv
for record in activity_record:
insert_query = """
INSERT INTO activity_record (
activity_id, record, timestamp, position_lat, position_long, distance, cadence, altitude,
hr, rr, speed, temperature
) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
"""
# add record attributes to the sqliteDB
values = (
record.activity_id, record.record, record.timestamp.isoformat(),
record.position_lat, record.position_long, record.distance, record.cadence,
record.altitude, record.hr, record.rr, record.speed, record.temperature
)
# Execute insert query
cursor.execute(insert_query, values)
conn.commit()
#turning into a csv for steaming
streaming_csv = pd.read_sql("SELECT * FROM activity_record", conn).to_csv('activity_streaming_data.csv', index=False)
Simulating Streaming¶
In the stream, we will use PySpark's simulatestream functionality to feed in our csv and create a simulated stream. The process flow includes two functions, an inner (simulate_stream) and an outer (process_stream_with_sliding_window).
The outer inner function takes in the csv file we created using the activity records dataset for the long run activity, create's a temporary csv with the desired batch size (10) and then send the batch to the processing function.
In the processing function (inner function), the batch is received, a window aggregation is performed for the last minute of data using a timestamp, and conditions are checked to see if the batch contains an abnomaly (defined here as a high speed but low cadence), sends an alert and adds the batch to a seperate parquet dataset in temporary storage. Also throughout the stream, the aggreagted data is appended to an overall dataset that shows each snapshot (every 3-5 seconds) of the activty with some summary statistic.
from pyspark.sql import SparkSession
from pyspark.sql.functions import *
from pyspark.sql.types import *
import time
from pyspark.sql.window import Window # Import Window class
import pandas as pd
import os
from shutil import rmtree
#create spark session
spark = SparkSession.builder \
.appName("SimulateStream") \
.getOrCreate()
#create CSV path with the simualted streaming data csv
csv_file_path = "/content/activity_streaming_data.csv"
# Define schema based for the stream
schema = StructType([
StructField("activity_id", StringType(), True),
StructField("record", IntegerType(), True),
StructField("timestamp", StringType(), True), # As TEXT
StructField("position_lat", DoubleType(), True),
StructField("position_long", DoubleType(), True),
StructField("distance", DoubleType(), True),
StructField("cadence", IntegerType(), True),
StructField("altitude", DoubleType(), True),
StructField("hr", IntegerType(), True),
StructField("rr", DoubleType(), True),
StructField("speed", DoubleType(), True),
StructField("temperature", DoubleType(), True)])
def simulate_stream(file_path, interval=5):
# Create a temporary directory to simulate streaming
temp_dir = "/tmp/stream_data"
if os.path.exists(temp_dir):
rmtree(temp_dir) # Clean up if directory exists
os.makedirs(temp_dir) # Recreate the directory after cleanup
else:
os.makedirs(temp_dir) # Create the directory if it doesn't exist
# Read from the SQLite Database for the initial streaming dataframe
df = pd.read_sql("SELECT * FROM activity_record", conn)
#set batch size to 10 for expediency
batch_size = 10
# loop through each record in the dataframe by 10s
for start_idx in range(0, len(df), batch_size):
#have the final index be the starting index for the batch + 10, unless that is greater than the length of the dataframe
end_idx = start_idx + batch_size if start_idx + batch_size < len(df) else len(df)
#for the batch, take the original dataframe from the start to end index
batch_df = df.iloc[start_idx:end_idx]
#create temporary csvs of the batches
batch_df.to_csv(f"{temp_dir}/batch_{start_idx // batch_size}.csv", index=False)
# Wait for the specified interval (5 seconds)
#time.sleep(interval)
# Load the batch data into PySpark for processing with schema and headers
batch_spark_df = spark.read.csv(f"{temp_dir}/batch_{start_idx // batch_size}.csv", header=True, schema=schema)
yield batch_spark_df # Yielding the DataFrame for streaming processing
lowCadenceData = None
lowCadenceData
# Set up the stream processing function
def process_stream_with_sliding_window():
window_duration = 60 # Window duration in seconds (1 minute)
slide_duration = 5 # Slide every 5 seconds
#lowCadenceData = pd.DataFrame()
# Initialize the streaming DataFrame
stream_df = None
lowCadenceData = None
for batch_df in simulate_stream(csv_file_path):
# Convert 'timestamp' column to TimestampType from text
batch_df = batch_df.withColumn("timestamp", to_timestamp("timestamp"))
# Create a Unix timestamp column for range-based window operations
batch_df = batch_df.withColumn("unix_timestamp", unix_timestamp("timestamp"))
# Define the window spec by using the Unix timestamp column
window_spec = Window.orderBy("unix_timestamp").rangeBetween(-window_duration, 0)
# Process the streaming batch data
#if no stream_df, then set it to the first batch; else union the incoming batch to the others
if stream_df is None:
stream_df = batch_df
else:
stream_df = stream_df.union(batch_df)
# Add window aggregation to see average speed over the last minute
aggregated_df = stream_df.withColumn("avg_speed", avg("speed").over(window_spec))
#alert if cadence goes above 60 but speed is above 8
alert_condition = (batch_df["speed"] > 8) & (batch_df["cadence"] < 85)
low_cadence_batch = batch_df.filter(alert_condition)
#lowCadenceData = pd.DataFrame()
if low_cadence_batch.count() > 0:
print("Alert! Cadence is too low! Pick up your feet!")
#save to lowCadenceData Df for analysis later
if lowCadenceData is None:
lowCadenceData = low_cadence_batch
else:
lowCadenceData = lowCadenceData.union(low_cadence_batch)
lowCadenceData.write.format("parquet").mode("append").save("abnormality_streaming.parquet")
# Show the aggregation result for each batch
print(aggregated_df.count())
aggregated_df.write.format("parquet").mode("append").save("aggregated_streaming.parquet")
# Start streaming processing
process_stream_with_sliding_window()
10 20 Alert! Cadence is too low! Pick up your feet! 30 40 Alert! Cadence is too low! Pick up your feet! 50 Alert! Cadence is too low! Pick up your feet! 60 70 Alert! Cadence is too low! Pick up your feet! 80 90 Alert! Cadence is too low! Pick up your feet! 100 Alert! Cadence is too low! Pick up your feet! 110 120 Alert! Cadence is too low! Pick up your feet! 130 140 Alert! Cadence is too low! Pick up your feet! 150 160 170 Alert! Cadence is too low! Pick up your feet! 180 Alert! Cadence is too low! Pick up your feet! 190 Alert! Cadence is too low! Pick up your feet! 200 210 220 Alert! Cadence is too low! Pick up your feet! 230 240 250 260 270 280 290 300 310 320 330 340 350 360 370 380 390 400 410 420 430 440 450 460 470 480 490 500 510 520 530 540 550 560 570 580 590 600 610 620 630 640 650 660 670 680 690 700 710 720 730 740 750 760 770 780 790 800 810 820 830 840 850 860 870 880 890 900 910 920 930 940 950 960 970 980 990 1000 1010 1020 1030
Now we read in our in-memory parquet files into python objects for spark dataframes to examine the content of our data
#read from parquet generated during streaming
streaming_df = spark.read.parquet("/content/aggregated_streaming.parquet")
abnomaly_df = spark.read.parquet("/content/abnormality_streaming.parquet")
abnomaly_df.show(10)
+-----------+------+-------------------+------------------+-------------------+------------------+-------+-----------------+---+----+-----------------+-----------+--------------+ |activity_id|record| timestamp| position_lat| position_long| distance|cadence| altitude| hr| rr| speed|temperature|unix_timestamp| +-----------+------+-------------------+------------------+-------------------+------------------+-------+-----------------+---+----+-----------------+-----------+--------------+ |18675131831| 120|2025-03-29 16:32:26| 40.64257367514074|-111.82889529503882| 1.302213837552| 84|4427.165361060001|137|NULL| 8.536147776| NULL| 1743265946| |18675131831| 121|2025-03-29 16:32:32| 40.64264617860317|-111.82914809323847| 1.316430810608| 84| 4424.54068914|137|NULL|8.410879360000001| NULL| 1743265952| |18675131831| 124|2025-03-29 16:32:46| 40.6428140681237|-111.82981319725513| 1.353302977616| 84| 4423.88452116|142|NULL|8.661416191999999| NULL| 1743265966| |18675131831| 126|2025-03-29 16:32:58|40.642979107797146| -111.8302866909653|1.3806992338239998| 84| 4421.25984924|143|NULL|8.473513568000001| NULL| 1743265978| |18675131831| 120|2025-03-29 16:32:26| 40.64257367514074|-111.82889529503882| 1.302213837552| 84|4427.165361060001|137|NULL| 8.536147776| NULL| 1743265946| |18675131831| 121|2025-03-29 16:32:32| 40.64264617860317|-111.82914809323847| 1.316430810608| 84| 4424.54068914|137|NULL|8.410879360000001| NULL| 1743265952| |18675131831| 124|2025-03-29 16:32:46| 40.6428140681237|-111.82981319725513| 1.353302977616| 84| 4423.88452116|142|NULL|8.661416191999999| NULL| 1743265966| |18675131831| 126|2025-03-29 16:32:58|40.642979107797146| -111.8302866909653|1.3806992338239998| 84| 4421.25984924|143|NULL|8.473513568000001| NULL| 1743265978| |18675131831| 120|2025-03-29 16:32:26| 40.64257367514074|-111.82889529503882| 1.302213837552| 84|4427.165361060001|137|NULL| 8.536147776| NULL| 1743265946| |18675131831| 121|2025-03-29 16:32:32| 40.64264617860317|-111.82914809323847| 1.316430810608| 84| 4424.54068914|137|NULL|8.410879360000001| NULL| 1743265952| +-----------+------+-------------------+------------------+-------------------+------------------+-------+-----------------+---+----+-----------------+-----------+--------------+ only showing top 10 rows
streaming_df.show(10)
+-----------+------+-------------------+------------------+-------------------+--------------------+-------+-----------------+---+----+--------------------+-----------+--------------+--------------------+ |activity_id|record| timestamp| position_lat| position_long| distance|cadence| altitude| hr| rr| speed|temperature|unix_timestamp| avg_speed| +-----------+------+-------------------+------------------+-------------------+--------------------+-------+-----------------+---+----+--------------------+-----------+--------------+--------------------+ |18675131831| 0|2025-03-29 16:22:50| 40.62984290532768|-111.82987497188151| 0.0| 0|4508.530190580001| 92|NULL|0.020132424000000003| NULL| 1743265370|0.020132424000000003| |18675131831| 1|2025-03-29 16:22:51| 40.62984659336507|-111.82984940707684| 0.001360802928| 59| 4507.8740226| 92|NULL| 0.0| NULL| 1743265371|0.010066212000000001| |18675131831| 2|2025-03-29 16:22:56| 40.62988447956741| -111.8296563718468| 0.01183712136| 91| 4505.24935068| 89|NULL| 6.471455848000001| NULL| 1743265376| 2.1638627573333333| |18675131831| 3|2025-03-29 16:23:01|40.629902416840196|-111.82941581122577| 0.024550376112| 89| 4504.5931827| 86|NULL| 9.726197728| NULL| 1743265381| 4.0544465| |18675131831| 4|2025-03-29 16:23:04| 40.62991004437208|-111.82927600108087| 0.031932265968| 87| 4505.24935068| 82|NULL| 8.911953024| NULL| 1743265384| 5.0259478048| |18675131831| 5|2025-03-29 16:23:07| 40.62991582788527|-111.82913082651794| 0.039575131728| 86|4505.905518660001| 85|NULL| 8.954454808000001| NULL| 1743265387| 5.680698972000001| |18675131831| 6|2025-03-29 16:23:08|40.629917085170746| -111.8290347699076| 0.044620665872| 86|4505.905518660001| 93|NULL| 9.35039248| NULL| 1743265388| 6.204940901714287| |18675131831| 7|2025-03-29 16:23:09|40.629917085170746| -111.8290347699076| 0.044620665872| 87|4505.905518660001| 96|NULL| 9.35039248| NULL| 1743265389| 6.5981223490000005| |18675131831| 8|2025-03-29 16:23:12| 40.62992328777909|-111.82889177463949| 0.052151684816| 87| 4507.21785462|102|NULL| 9.370524904| NULL| 1743265392| 6.9061670773333335| |18675131831| 9|2025-03-29 16:23:15| 40.6299315020442|-111.82875892147422|0.059160751952000004| 86| 4507.8740226|106|NULL| 9.307890696000001| NULL| 1743265395| 7.1463394392| +-----------+------+-------------------+------------------+-------------------+--------------------+-------+-----------------+---+----+--------------------+-----------+--------------+--------------------+ only showing top 10 rows
Batch Ingestion of OLAP-like data¶
Now that we have our streaming data saved in our memory, we will move onto the more batch/OLAP-like data available in the Activities and ActivityLaps data sets. The Activities dataset will contain activity-level information, such as the max heartrate, average cadence, and other summary statistics of metrics in the activity. ActivityLaps, will add lap-level granulatiy to the dataset and can be joined through part of its composite key (Activity ID, Lap) with the overall activities dataset to give a lap-by-lap granularity.
from datetime import timedelta, datetime, time
#function to convert time to seconds as it is present in the GarminDB
def time_to_seconds(t):
"""Convert a datetime.time object to total seconds."""
if t is None:
return None
return t.hour * 3600 + t.minute * 60 + t.second
#delete the SQLite tables for reproducability
cursor.execute('DELETE FROM Activities')
cursor.execute('DELETE FROM ActivityLaps')
cursor.execute('DELETE FROM StepsActivity')
#loop through each activity in our activity ID list
for activity_id in activity_ids:
#pull the activity
activity = Activities.get(garmin_act_db, activity_id)
#get the elapsed seconds and moving seconds using our function above
elapsed_seconds = time_to_seconds(activity.elapsed_time)
moving_seconds = time_to_seconds(activity.moving_time)
#insert values into the SQLite table by using the attribute reference, as specificed by the GarminDB docs.
cursor.execute("""
INSERT INTO Activities (id, name, description, sport, type, distance, elapsed_time, moving_time, avg_hr, max_hr,
avg_rr, max_rr, ascent, descent, calories, training_load, avg_temperature,
start_lat, start_long, stop_lat, stop_long, avg_cadence)
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
""", (
activity_id, activity.name, activity.description, activity.sport, activity.type,
activity.distance, elapsed_seconds, moving_seconds, activity.avg_hr, activity.max_hr,
activity.avg_rr, activity.max_rr, activity.ascent, activity.descent, activity.calories,
activity.training_load, activity.avg_temperature, activity.start_lat, activity.start_long,
activity.stop_lat, activity.stop_long, str(activity.avg_cadence)
))
# Extract lap data
laps = ActivityLaps.get_activity(garmin_act_db, activity_id)
#loop thorugh each lap
for lap in laps:
#check if heart rate is available, if not - just leave as null
hr_zones_method_value = lap.hr_zones_method.value if lap.hr_zones_method else None
#insert values
cursor.execute("""
INSERT INTO ActivityLaps (
activity_id, lap, start_time, stop_time, elapsed_time, moving_time, distance, cycles,
avg_hr, max_hr, avg_rr, max_rr, calories, avg_cadence, max_cadence, avg_speed,
max_speed, ascent, descent, max_temperature, min_temperature, avg_temperature,
start_lat, start_long, stop_lat, stop_long, hrz_1_hr, hrz_2_hr,
hrz_3_hr, hrz_4_hr, hrz_5_hr, hrz_1_time, hrz_2_time, hrz_3_time, hrz_4_time, hrz_5_time
) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
""", (
lap.activity_id, lap.lap, lap.start_time.isoformat(), lap.stop_time.isoformat(),
lap.elapsed_time.isoformat(), lap.moving_time.isoformat(), lap.distance, lap.cycles,
lap.avg_hr, lap.max_hr, lap.avg_rr, lap.max_rr, lap.calories, lap.avg_cadence,
lap.max_cadence, lap.avg_speed, lap.max_speed, lap.ascent, lap.descent,
lap.max_temperature, lap.min_temperature, lap.avg_temperature, lap.start_lat,
lap.start_long, lap.stop_lat, lap.stop_long, lap.hrz_1_hr,
lap.hrz_2_hr, lap.hrz_3_hr, lap.hrz_4_hr, lap.hrz_5_hr, lap.hrz_1_time.isoformat(),
lap.hrz_2_time.isoformat(), lap.hrz_3_time.isoformat(), lap.hrz_4_time.isoformat(),
lap.hrz_5_time.isoformat()
))
#commit to SQLite DB
conn.commit()
Next, we extract the data from the SQLite DB and load them into python objects as pandas dataframe for investigation and creation of the combined dataset of summary-level and lap-level data
activities_df = pd.read_sql('SELECT * FROM Activities', conn)
activity_laps_df = pd.read_sql("SELECT * FROM activityLaps", conn)
#creating the combined data frame
combined_df = activity_laps_df.add_suffix('_lap').merge(activities_df.add_suffix('_activity'), left_on='activity_id', right_on = 'id')#.to_csv('merged_data.csv', index=False)
#Dataframes ready for import:
garmin_summary_Data = combined_df.copy()
aggregated_streaming_data = streaming_df.toPandas()
abnormality_streaming_data = abnomaly_df.toPandas()
Next, we move Dataframes to csvs in Google Drive so that we don't have to call the API/run the notebook every time we worked on this.
!cp -r '/content/drive/My Drive/Data Engineering Final Project/Ingested Data' '/content/'
garmin_summary_Data.to_csv('/content/drive/My Drive/Data Engineering Final Project/Ingested Data/garmin_summary_Data.csv', index=False)
aggregated_streaming_data.to_csv('/content/drive/My Drive/Data Engineering Final Project/Ingested Data/aggregated_streaming_data.csv', index=False)
abnormality_streaming_data.to_csv('/content/drive/My Drive/Data Engineering Final Project/Ingested Data/abnormality_streaming_data.csv', index=False)
Loading¶
Reading CSVs from Google Storage.
import pandas as pd
#mount drive to shared folder
from google.colab import drive
drive.mount('/content/drive')
!cp -r '/content/drive/My Drive/Data Engineering Final Project/Ingested Data' '/content/'
#read in the csvs to dataframes
garmin_summary_Data = pd.read_csv('/content/drive/My Drive/Data Engineering Final Project/Ingested Data/garmin_summary_Data.csv')
aggregated_streaming_data = pd.read_csv('/content/drive/My Drive/Data Engineering Final Project/Ingested Data/aggregated_streaming_data.csv')
abnormality_streaming_data = pd.read_csv('/content/drive/My Drive/Data Engineering Final Project/Ingested Data/abnormality_streaming_data.csv')
Mounted at /content/drive
Examining the data and doing some simple aggregations:
garmin_summary_Data[garmin_summary_Data['activity_id'] == 18675131831][['activity_id', 'lap', 'max_hr_activity', 'max_hr_lap']]
| activity_id | lap | max_hr_activity | max_hr_lap | |
|---|---|---|---|---|
| 123 | 18675131831 | 0 | 185 | 149 |
| 124 | 18675131831 | 1 | 185 | 145 |
| 125 | 18675131831 | 2 | 185 | 150 |
| 126 | 18675131831 | 3 | 185 | 164 |
| 127 | 18675131831 | 4 | 185 | 170 |
| 128 | 18675131831 | 5 | 185 | 183 |
| 129 | 18675131831 | 6 | 185 | 178 |
| 130 | 18675131831 | 7 | 185 | 174 |
| 131 | 18675131831 | 8 | 185 | 181 |
| 132 | 18675131831 | 9 | 185 | 185 |
| 133 | 18675131831 | 10 | 185 | 182 |
| 134 | 18675131831 | 11 | 185 | 181 |
| 135 | 18675131831 | 12 | 185 | 177 |
garmin_summary_Data.head()
| activity_id | lap | start_time | stop_time | elapsed_time_lap | moving_time_lap | distance_lap | cycles | avg_hr_lap | max_hr_lap | ... | ascent_activity | descent_activity | calories_activity | training_load | avg_temperature_activity | start_lat_activity | start_long_activity | stop_lat_activity | stop_long_activity | avg_cadence_activity | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 18658085886 | 0 | 2025-03-27T21:01:50 | 2025-03-27T21:26:39 | 00:22:45.026000 | 00:22:45.026000 | 0.000000 | 90.0 | 72 | 117 | ... | NaN | NaN | 65 | NaN | 260.6 | NaN | NaN | NaN | NaN | 57 |
| 1 | 18619047790 | 0 | 2025-03-23T17:47:19 | 2025-03-23T18:14:30 | 00:27:05.351000 | 00:27:05.351000 | 0.000000 | 466.0 | 88 | 110 | ... | NaN | NaN | 122 | NaN | 260.6 | NaN | NaN | NaN | NaN | 59 |
| 2 | 18648723722 | 0 | 2025-03-26T21:09:20 | 2025-03-26T21:44:39 | 00:34:57.787000 | 00:31:48.010000 | 0.000000 | 62.0 | 67 | 97 | ... | NaN | NaN | 71 | NaN | 260.6 | NaN | NaN | NaN | NaN | 58 |
| 3 | 18694977771 | 0 | 2025-03-30T19:07:56 | 2025-03-30T19:33:54 | 00:25:51.743000 | 00:25:51.743000 | 0.000000 | 299.0 | 95 | 143 | ... | 0.000000 | 0.000000 | 138 | NaN | 260.6 | NaN | NaN | NaN | NaN | 59 |
| 4 | 18648564633 | 0 | 2025-03-26T19:30:07 | 2025-03-26T19:37:37 | 00:07:29.850000 | 00:07:11.677000 | 0.999998 | 613.0 | 116 | 144 | ... | 757.874017 | 725.065618 | 592 | NaN | 260.6 | 40.629936 | -111.829864 | 40.62979 | -111.829716 | 85 |
5 rows Ć 59 columns
garmin_summary_Data['start_date'] = pd.to_datetime(garmin_summary_Data['start_time']).dt.date
garmin_summary_Data.groupby('start_time')['max_hr_lap'].max()
| max_hr_lap | |
|---|---|
| start_time | |
| 2025-03-17T20:08:14 | 102 |
| 2025-03-17T20:27:56 | 89 |
| 2025-03-18T19:54:45 | 142 |
| 2025-03-18T20:02:10 | 154 |
| 2025-03-18T20:09:31 | 151 |
| ... | ... |
| 2025-04-03T19:56:15 | 177 |
| 2025-04-03T20:00:38 | 173 |
| 2025-04-03T20:03:12 | 172 |
| 2025-04-03T20:12:00 | 172 |
| 2025-04-03T20:21:57 | 145 |
136 rows Ć 1 columns
Next, we create a simple visualization showing the average heartrate during the period examined. This and other visualization are made possible through the summary and lap data.
import matplotlib.pyplot as plt
# Group by activity_id and get the max of max_hr_lap
max_hr_by_activity = garmin_summary_Data.groupby('start_date')['max_hr_lap'].max()
# Calculate the overall average max HR
overall_average_hr = max_hr_by_activity.mean()
# Create the plot
plt.figure(figsize=(10, 6))
max_hr_by_activity.plot(kind='bar')
# Add a horizontal line for the overall average HR
plt.axhline(y=overall_average_hr, color='red', linestyle='--', label=f'Average Max Heart Rate: {overall_average_hr:.2f}')
plt.xlabel('Activity Date')
plt.ylabel('Max Heart Rate')
plt.title('Max Heart Rate by Run')
plt.xticks(rotation=45, ha='right')
plt.legend() # Show the legend for the average line
plt.tight_layout()
plt.show()
aggregated_streaming_data.head()
| activity_id | record | timestamp | position_lat | position_long | distance | cadence | altitude | hr | rr | speed | temperature | unix_timestamp | avg_speed | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 18675131831 | 0 | 2025-03-29 16:22:50 | 40.629843 | -111.829875 | 0.000000 | 0 | 4508.530191 | 92 | NaN | 0.020132 | NaN | 1743265370 | 0.020132 |
| 1 | 18675131831 | 1 | 2025-03-29 16:22:51 | 40.629847 | -111.829849 | 0.001361 | 59 | 4507.874023 | 92 | NaN | 0.000000 | NaN | 1743265371 | 0.010066 |
| 2 | 18675131831 | 2 | 2025-03-29 16:22:56 | 40.629884 | -111.829656 | 0.011837 | 91 | 4505.249351 | 89 | NaN | 6.471456 | NaN | 1743265376 | 2.163863 |
| 3 | 18675131831 | 3 | 2025-03-29 16:23:01 | 40.629902 | -111.829416 | 0.024550 | 89 | 4504.593183 | 86 | NaN | 9.726198 | NaN | 1743265381 | 4.054447 |
| 4 | 18675131831 | 4 | 2025-03-29 16:23:04 | 40.629910 | -111.829276 | 0.031932 | 87 | 4505.249351 | 82 | NaN | 8.911953 | NaN | 1743265384 | 5.025948 |
abnormality_streaming_data.head()
| activity_id | record | timestamp | position_lat | position_long | distance | cadence | altitude | hr | rr | speed | temperature | unix_timestamp | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 18675131831 | 120 | 2025-03-29 16:32:26 | 40.642574 | -111.828895 | 1.302214 | 84 | 4427.165361 | 137 | NaN | 8.536148 | NaN | 1743265946 |
| 1 | 18675131831 | 121 | 2025-03-29 16:32:32 | 40.642646 | -111.829148 | 1.316431 | 84 | 4424.540689 | 137 | NaN | 8.410879 | NaN | 1743265952 |
| 2 | 18675131831 | 124 | 2025-03-29 16:32:46 | 40.642814 | -111.829813 | 1.353303 | 84 | 4423.884521 | 142 | NaN | 8.661416 | NaN | 1743265966 |
| 3 | 18675131831 | 126 | 2025-03-29 16:32:58 | 40.642979 | -111.830287 | 1.380699 | 84 | 4421.259849 | 143 | NaN | 8.473514 | NaN | 1743265978 |
| 4 | 18675131831 | 120 | 2025-03-29 16:32:26 | 40.642574 | -111.828895 | 1.302214 | 84 | 4427.165361 | 137 | NaN | 8.536148 | NaN | 1743265946 |
Loading to Google Cloud Storage¶
Next, we upload the files to google cloud storage to demonstrate maintaining the database on the cloud for access by other team members.
import os
from google.cloud import storage
#Upload garmin summary data to google cloud storage
garmin_summary_Data.to_csv('garmin_summary_Data.csv',index=False)
os.environ["GOOGLE_APPLICATION_CREDENTIALS"] = "/content/de-final-project-456116-f563a8654c8e.json"
GCP_PROJECT = 'DE final Project'
STORAGE_BUCKET = 'de_final_project_data_storage'
storage_client = storage.Client() # create the Client.
bucket = storage_client.bucket(STORAGE_BUCKET) # get the bucket instance
blob = bucket.blob('garmin_summary_Data.csv') # create a new blo
blob.upload_from_filename('garmin_summary_Data.csv') # upload the local file named 'garmin_summary_Data.csv'
blob_to_download = bucket.blob('garmin_summary_Data.csv') # create a new blob
blob.download_to_filename('garmin_summary_Data.csv') # upload the file
Uploading streaming data
#Upload aggregated streaming data to google cloud storage
aggregated_streaming_data.to_csv('aggregated_streaming_data.csv',index=False)
os.environ["GOOGLE_APPLICATION_CREDENTIALS"] = "/content/de-final-project-456116-f563a8654c8e.json"
GCP_PROJECT = 'DE final Project'
STORAGE_BUCKET = 'de_final_project_data_storage'
storage_client = storage.Client() # create the Client.
bucket = storage_client.bucket(STORAGE_BUCKET) # get the bucket instance
blob = bucket.blob('aggregated_streaming_data.csv') # create a new blob
blob.upload_from_filename('aggregated_streaming_data.csv') # upload the local file named 'aggregated_streaming_data.csv'
blob_to_download = bucket.blob('aggregated_streaming_data.csv') # create a new blob
blob.download_to_filename('aggregated_streaming_data.csv') # upload the file
uploading abnormality streaming data
#Upload gabnormality streaming data to google cloud storage
abnormality_streaming_data.to_csv('abnormality_streaming_data.csv',index=False)
os.environ["GOOGLE_APPLICATION_CREDENTIALS"] = "/content/de-final-project-456116-f563a8654c8e.json"
GCP_PROJECT = 'DE final Project'
STORAGE_BUCKET = 'de_final_project_data_storage'
storage_client = storage.Client() # create the Client.
bucket = storage_client.bucket(STORAGE_BUCKET) # get the bucket instance
blob = bucket.blob('abnormality_streaming_data.csv') # create a new blob
blob.upload_from_filename('abnormality_streaming_data.csv') # upload the local file named 'abnormality_streaming_data.csv'
blob_to_download = bucket.blob('abnormality_streaming_data.csv') # create a new blob
blob.download_to_filename('abnormality_streaming_data.csv') # upload the file
Once all files are loaded, we can now see the 3 csv files in the Google Cloud bucket, ready for analysis and extraction.
Analysis¶
Finally, we use the aggregated streaming data to create a map of one of the recent runs. This map will have 3 layers, each demonstrating a different metric. We plot points on the map using the longitude and latitude associated with each observation. We then create a line between the points and add a tool tip that gives the average speed between the two points. Finally, we use conditionals to color the lines different colors based off speed, cadence, and heart rate.
aggregated_streaming_data.head(5)
| activity_id | record | timestamp | position_lat | position_long | distance | cadence | altitude | hr | rr | speed | temperature | unix_timestamp | avg_speed | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 18675131831 | 0 | 2025-03-29 16:22:50 | 40.629843 | -111.829875 | 0.000000 | 0 | 4508.530191 | 92 | NaN | 0.020132 | NaN | 1743265370 | 0.020132 |
| 1 | 18675131831 | 1 | 2025-03-29 16:22:51 | 40.629847 | -111.829849 | 0.001361 | 59 | 4507.874023 | 92 | NaN | 0.000000 | NaN | 1743265371 | 0.010066 |
| 2 | 18675131831 | 2 | 2025-03-29 16:22:56 | 40.629884 | -111.829656 | 0.011837 | 91 | 4505.249351 | 89 | NaN | 6.471456 | NaN | 1743265376 | 2.163863 |
| 3 | 18675131831 | 3 | 2025-03-29 16:23:01 | 40.629902 | -111.829416 | 0.024550 | 89 | 4504.593183 | 86 | NaN | 9.726198 | NaN | 1743265381 | 4.054447 |
| 4 | 18675131831 | 4 | 2025-03-29 16:23:04 | 40.629910 | -111.829276 | 0.031932 | 87 | 4505.249351 | 82 | NaN | 8.911953 | NaN | 1743265384 | 5.025948 |
#load in folium page for map creation
import folium
from folium import plugins
from folium import FeatureGroup, LayerControl, Map, PolyLine, Tooltip
#extracting just the columns we need to create the dataset, also pulling every 10 columns as to not overwhelm the plotting software
map_cadence_data = aggregated_streaming_data[['record', 'position_lat', 'position_long', 'cadence', 'hr', 'speed']].iloc[::10].sort_values(by='record')
mymap = None
# Create a map centered at the first observation
map_center = [map_cadence_data.loc[0, 'position_lat'], map_cadence_data.loc[0,'position_long']] # Starting position
mymap = folium.Map(location=map_center, zoom_start=14)
#create the 3 layers for cadence, hr, and speed
cadence_layer = FeatureGroup(name='Cadence View')
hr_layer = FeatureGroup(name='Heart Rate View')
speed_layer = FeatureGroup(name='Speed View')
# List to store coordinates for the polyline
coordinates = []
# Iterate through the DataFrame rows and create list of coordinates
for i, row in map_cadence_data.iterrows():
# Extract the necessary values
lat = row['position_lat']
lon = row['position_long']
# Add coordinates to list for polyline
coordinates.append([lat, lon])
# Draw PolyLine between points with color coding for cadence
for i in range(1, len(coordinates)):
# Access cadence for the current segment
segment_cadence = map_cadence_data.iloc[i-1:i+1]['cadence'] # Get cadence for the two consecutive points
avg_cadence = segment_cadence.mean() # Calculate the average cadence for this segment
segment_hr = map_cadence_data.iloc[i-1:i+1]['hr']
#get average hr between the two segments
avg_hr = segment_hr.mean()
segment_speed = map_cadence_data.iloc[i-1:i+1]['speed']
#get average speed between the two segments
avg_speed = segment_speed.mean()
# Create a popup that shows each of the metrics
cadence_popup = f"Average Cadence: {avg_cadence:.2f}"
hr_popup = f"Average HR: {avg_hr:.2f}"
speed_popup = f"Average Speed: {avg_speed:.2f}"
# Set color based on cadence
if avg_cadence < 85:
color_cadence = 'red'
elif avg_cadence < 86.5:
color_cadence = 'orange'
else:
color_cadence = 'green'
#set color based on hr
if avg_hr < 140:
color_hr = 'green'
elif avg_hr < 165:
color_hr = 'orange'
else:
color_hr = 'red'
#set color based on speed
if avg_speed >= 9.5:
color_speed = 'red'
elif( avg_speed > 8.5) & (avg_speed < 9.5):
color_speed = 'orange'
else:
color_speed = 'green'
# Draw PolyLine between consecutive points for each of the metrics
#cadence
folium.PolyLine(
locations=[coordinates[i-1], coordinates[i]],
color=color_cadence,
weight=5,
opacity=0.7
).add_to(cadence_layer).add_child(folium.Popup(cadence_popup))
#hr
folium.PolyLine(
locations=[coordinates[i-1], coordinates[i]],
color=color_hr,
weight=5,
opacity=0.7
).add_to(hr_layer).add_child(folium.Popup(hr_popup))
#speed
folium.PolyLine(
locations=[coordinates[i-1], coordinates[i]],
color=color_speed,
weight=5,
opacity=0.7
).add_to(speed_layer).add_child(folium.Popup(speed_popup))
# Add all layers to the map
cadence_layer.add_to(mymap)
hr_layer.add_to(mymap)
speed_layer.add_to(mymap)
# Add layer control
LayerControl().add_to(mymap)
#save map to html file
mymap.save('/content/drive/My Drive/Data Engineering Final Project/Ingested Data/running_map.html')
# Show the map
mymap